# coding=utf-8
import codecs
import logging
import sys

import os
from pyquery import PyQuery as pq

reload(sys)
sys.setdefaultencoding("utf-8")

output_dir = 'D:/03.Documents/bxjg_file'
source_rootdir = 'D:/03.Documents/bxjg'
list = os.listdir(unicode(source_rootdir, 'utf-8')) #列出文件夹下所有的目录与文件
for i in range(0, len(list)):
    path = os.path.join(source_rootdir, list[i])
    if os.path.isfile(path):
        try:
            d = pq(filename=path, encoding='utf-8-sig')
            full_text = d.text()
            filename = os.path.split(path)[1]
            filename = "origin_" + filename
            output_file = os.path.join(output_dir, filename);
            with codecs.open(output_file, 'wb', encoding='utf-8-sig') as f:
                f.write(full_text)
        except BaseException, ex:
            logging.info(ex)
